)
;; For EOR (vector, register) and SVE EOR (vector, immediate)
-(define_insn "xor<mode>3<vczle><vczbe>"
+(define_insn "@xor<mode>3<vczle><vczbe>"
[(set (match_operand:VDQ_I 0 "register_operand")
(xor:VDQ_I (match_operand:VDQ_I 1 "register_operand")
(match_operand:VDQ_I 2 "aarch64_reg_or_xor_imm")))]
;; Patterns comparing two vectors and conditionally jump
+;; Define cbranch on masks. This optab is only called for BOOLEAN_VECTOR_TYPE_P
+;; which allows optimizing compares with zero.
(define_expand "cbranch<mode>4"
[(set (pc)
(if_then_else
DONE;
})
+;; Define vec_cbranch_any and vec_cbranch_all
+;; Vector comparison and branch for Adv. SIMD Integer types using SVE
+;; instructions.
+(define_expand "<optab><mode>"
+ [(set (pc)
+ (unspec:VALL
+ [(if_then_else
+ (match_operator 0 "aarch64_cbranch_compare_operation"
+ [(match_operand:VALL 1 "register_operand")
+ (match_operand:VALL 2 "aarch64_simd_reg_or_zero")])
+ (label_ref (match_operand 3 ""))
+ (pc))]
+ CBRANCH_CMP))]
+ "TARGET_SIMD"
+{
+ auto code = GET_CODE (operands[0]);
+ if (TARGET_SVE)
+ {
+ machine_mode full_mode = aarch64_full_sve_mode (<VEL>mode).require ();
+
+ rtx in1 = force_lowpart_subreg (full_mode, operands[1], <MODE>mode);
+ rtx in2;
+ if (CONST0_RTX (<MODE>mode) == operands[2])
+ in2 = CONST0_RTX (full_mode);
+ else
+ in2 = force_lowpart_subreg (full_mode, operands[2], <MODE>mode);
+
+ unsigned lanes
+ = exact_div (GET_MODE_BITSIZE (<MODE>mode), 8).to_constant ();
+ machine_mode pred_mode = aarch64_sve_pred_mode (full_mode);
+ rtx ptrue = aarch64_ptrue_reg (VNx16BImode, lanes);
+ rtx hint = gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode);
+
+ rtx tmp = gen_reg_rtx (pred_mode);
+ rtx cast_ptrue = gen_lowpart (pred_mode, ptrue);
+
+ if (FLOAT_MODE_P (full_mode))
+ {
+ aarch64_expand_sve_vec_cmp<sve_cmp_suff> (tmp, code, in1, in2);
+ emit_insn (gen_and3 (pred_mode, tmp, tmp, cast_ptrue));
+ emit_insn (gen_aarch64_ptest (pred_mode, ptrue, cast_ptrue, hint,
+ tmp));
+ }
+ else
+ emit_insn (gen_aarch64_pred_cmp_ptest (code, full_mode, tmp, ptrue, in1,
+ in2, cast_ptrue, hint,
+ cast_ptrue, hint));
+
+ rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+ rtx cmp_reg = gen_rtx_<cbranch_op> (VOIDmode, cc_reg, const0_rtx);
+ emit_jump_insn (gen_aarch64_bcond (cmp_reg, cc_reg, operands[3]));
+ DONE;
+ }
+
+ rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
+ emit_insn (gen_vec_cmp<mode><v_int_equiv> (tmp, operands[0], operands[1],
+ operands[2]));
+
+ /* For 128-bit vectors we need a reduction to 64-bit first. */
+ if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode)))
+ {
+ /* Always reduce using a V4SI. */
+ rtx reduc = gen_lowpart (V4SImode, tmp);
+ rtx res = gen_reg_rtx (V4SImode);
+ emit_insn (gen_aarch64_umaxpv4si (res, reduc, reduc));
+ emit_move_insn (tmp, gen_lowpart (<V_INT_EQUIV>mode, res));
+ }
+
+ rtx val = gen_reg_rtx (DImode);
+ emit_move_insn (val, gen_lowpart (DImode, tmp));
+
+ rtx cc_reg = aarch64_gen_compare_reg (<cbranch_op>, val, const0_rtx);
+ rtx cmp_rtx = gen_rtx_fmt_ee (<cbranch_op>, DImode, cc_reg, const0_rtx);
+ emit_jump_insn (gen_aarch64_bcond (cmp_rtx, cc_reg, operands[3]));
+ DONE;
+})
+
;; Patterns comparing two vectors to produce a mask.
(define_expand "vec_cmp<mode><mode>"
;; - PTEST
;; -------------------------------------------------------------------------
-;; Branch based on predicate equality or inequality.
+;; Branch based on predicate equality or inequality. This allows PTEST to be
+;; combined with other flag setting instructions like ORR -> ORRS.
(define_expand "cbranch<mode>4"
[(set (pc)
(if_then_else
}
)
+;; Define vec_cbranch_any and vec_cbranch_all
+;; Branch based on predicate equality or inequality.
+(define_expand "<optab><mode>"
+ [(set (pc)
+ (unspec:PRED_ALL
+ [(if_then_else
+ (match_operator 0 "aarch64_equality_operator"
+ [(match_operand:PRED_ALL 1 "register_operand")
+ (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
+ (label_ref (match_operand 3 ""))
+ (pc))]
+ CBRANCH_CMP))]
+ ""
+ {
+ rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>));
+ rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue);
+ rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode);
+ rtx pred;
+ if (operands[2] == CONST0_RTX (<MODE>mode))
+ pred = operands[1];
+ else
+ {
+ pred = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1],
+ operands[2]));
+ }
+ emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred));
+
+ rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+ rtx cmp_reg = gen_rtx_<cbranch_op> (VOIDmode, cc_reg, const0_rtx);
+ emit_jump_insn (gen_aarch64_bcond (cmp_reg, cc_reg, operands[3]));
+ DONE;
+ }
+)
+
+;; Define cond_vec_cbranch_any and cond_vec_cbranch_all
+;; Vector comparison and branch for SVE Floating points types instructions.
+;; But only on EQ or NE comparisons, which allows us to use integer compares
+;; instead and about the ptest.
+(define_expand "<optab><mode>"
+ [(set (pc)
+ (unspec:SVE_I
+ [(if_then_else
+ (match_operator 0 "aarch64_comparison_operator"
+ [(match_operand:<VPRED> 1 "register_operand")
+ (match_operand:SVE_I 2 "register_operand")
+ (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero")])
+ (label_ref (match_operand 4 ""))
+ (pc))]
+ COND_CBRANCH_CMP))]
+ ""
+{
+ auto code = GET_CODE (operands[0]);
+ rtx in1 = operands[2];
+ rtx in2 = operands[3];
+
+ rtx res = gen_reg_rtx (<VPRED>mode);
+ rtx gp = gen_lowpart (VNx16BImode, operands[1]);
+ rtx cast_gp = operands[1];
+ rtx flag = gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode);
+
+ emit_insn (gen_aarch64_pred_cmp_ptest (code, <MODE>mode, res, gp, in1, in2,
+ cast_gp, flag, cast_gp, flag));
+
+ rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+ rtx cmp_reg = gen_rtx_<cbranch_op> (VOIDmode, cc_reg, const0_rtx);
+ emit_jump_insn (gen_aarch64_bcond (cmp_reg, cc_reg, operands[4]));
+ DONE;
+})
+
;; See "Description of UNSPEC_PTEST" above for details.
-(define_insn "aarch64_ptest<mode>"
+(define_insn "@aarch64_ptest<mode>"
[(set (reg:CC_NZC CC_REGNUM)
(unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa")
(match_operand 1)
UNSPEC_SSHLL ; Used in aarch64-simd.md.
UNSPEC_USHLL ; Used in aarch64-simd.md.
UNSPEC_ADDP ; Used in aarch64-simd.md.
+ UNSPEC_CMP_ALL ; Used in aarch64-simd.md.
+ UNSPEC_CMP_ANY ; Used in aarch64-simd.md.
+ UNSPEC_COND_CMP_ALL ; Used in aarch64-simd.md.
+ UNSPEC_COND_CMP_ANY ; Used in aarch64-simd.md.
UNSPEC_TBL ; Used in vector permute patterns.
UNSPEC_TBLQ ; Used in vector permute patterns.
UNSPEC_TBX ; Used in vector permute patterns.
(VNx16SI "vnx4bi") (VNx16SF "vnx4bi")
(VNx8DI "vnx2bi") (VNx8DF "vnx2bi")])
+;; Map mode to suffix for using an SVE comparison
+(define_mode_attr sve_cmp_suff [(V8QI "_int") (V16QI "_int")
+ (V4HI "_int") (V8HI "_int") (V2SI "_int")
+ (V4SI "_int") (V2DI "_int")
+ (V2SF "_float") (V4SF "_float") (V2DF "_float")])
+
(define_mode_attr VDOUBLE [(VNx16QI "VNx32QI")
(VNx8HI "VNx16HI") (VNx8HF "VNx16HF")
(VNx8BF "VNx16BF")
(define_int_iterator RHADD [UNSPEC_SRHADD UNSPEC_URHADD])
+(define_int_iterator CBRANCH_CMP [UNSPEC_CMP_ALL UNSPEC_CMP_ANY])
+(define_int_iterator COND_CBRANCH_CMP [UNSPEC_COND_CMP_ALL UNSPEC_COND_CMP_ANY])
+
(define_int_iterator BSL_DUP [1 2])
(define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT])
(UNSPEC_COND_SCVTF "float")
(UNSPEC_COND_SMAX "smax")
(UNSPEC_COND_SMIN "smin")
- (UNSPEC_COND_UCVTF "floatuns")])
+ (UNSPEC_COND_UCVTF "floatuns")
+ (UNSPEC_CMP_ALL "vec_cbranch_all")
+ (UNSPEC_CMP_ANY "vec_cbranch_any")
+ (UNSPEC_COND_CMP_ALL "cond_vec_cbranch_all")
+ (UNSPEC_COND_CMP_ANY "cond_vec_cbranch_any")])
+
+(define_int_attr cbranch_op [(UNSPEC_CMP_ALL "EQ")
+ (UNSPEC_CMP_ANY "NE")
+ (UNSPEC_COND_CMP_ALL "EQ")
+ (UNSPEC_COND_CMP_ANY "NE")])
(define_int_attr fmaxmin [(UNSPEC_FMAX "fmax_nan")
(UNSPEC_FMAXNM "fmax")
(define_special_predicate "aarch64_equality_operator"
(match_code "eq,ne"))
+(define_special_predicate "aarch64_cbranch_compare_operation"
+ (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,unordered,
+ ordered,unlt,unle,unge,ungt")
+{
+ return TARGET_SIMD;
+})
+
(define_special_predicate "aarch64_carry_operation"
(match_code "ltu,geu")
{
** ...
** ld1w z[0-9]+.s, p[0-9]+/z, \[x[0-9], x[0-9], lsl 2\]
** cmple p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
** ...
*/
** f1:
** ...
** cmpgt p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
-** b.(any|none) \.L[0-9]+
+** b(\.?eq|\.none) \.L[0-9]+
** ...
*/
void f1 ()
** f2:
** ...
** cmpge p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
-** b.(any|none) \.L[0-9]+
+** b(\.?eq|\.none) \.L[0-9]+
** ...
*/
void f2 ()
** f3:
** ...
** cmpeq p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
-** b.(any|none) \.L[0-9]+
+** b(\.?eq|\.none) \.L[0-9]+
** ...
*/
void f3 ()
** f4:
** ...
** cmpne p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
-** b.(any|none) \.L[0-9]+
+** b(\.?eq|\.none) \.L[0-9]+
** ...
*/
void f4 ()
** f5:
** ...
** cmplt p[0-9]+.s, p7/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
-** b.(any|none) .L[0-9]+
+** b(\.?eq|\.none) .L[0-9]+
** ...
*/
void f5 ()
** f6:
** ...
** cmple p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
-** ptest p[0-9]+, p[0-9]+.b
-** b.(any|none) \.L[0-9]+
+** b(\.?eq|\.none) \.L[0-9]+
** ...
*/
void f6 ()
--- /dev/null
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O3 --param aarch64-autovec-preference=asimd-only" } */
+/* { dg-require-effective-target lp64 } */
+
+#include <stdio.h>
+
+#define N 640
+#ifndef TYPE
+#define TYPE int
+#endif
+#ifndef FMT
+#define FMT "d"
+#endif
+
+
+TYPE a[N] = {0};
+TYPE b[N] = {0};
+
+char *curr_test;
+
+/* Macro to define a function with a specific comparison */
+#define DEFINE_TEST_FUNC(NAME, OP) \
+ __attribute__((noipa)) \
+ void NAME(void) { \
+ for (int i = 0; i < N; i++) { \
+ b[i] += a[i]; \
+ if (a[i] OP 0) \
+ break; \
+ } \
+ }
+
+/* Generate the six comparisons functions using the macro. */
+DEFINE_TEST_FUNC(f1, >)
+DEFINE_TEST_FUNC(f2, >=)
+DEFINE_TEST_FUNC(f3, ==)
+DEFINE_TEST_FUNC(f4, !=)
+DEFINE_TEST_FUNC(f5, <)
+DEFINE_TEST_FUNC(f6, <=)
+
+__attribute__((noreturn))
+static inline void __abort_trace (const char *m, int i, TYPE result, TYPE expected)
+{
+ printf ("*** [%s] FAIL AT %s:%d in %s - expected %" FMT " but got %" FMT " at pos %d\n",
+ m, __FILE__, __LINE__, curr_test, expected, result, i);
+ __builtin_abort ();
+}
+
+/* Array setup macro. */
+#define RESET_ARRAYS(_aval, _idx, _force, _bval) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = 0; i < N; ++i) { \
+ a[i] = _aval; \
+ b[i] = _bval; \
+ } \
+ if (_idx >= 0 && _idx < N) \
+ a[_idx] = _force; \
+ } while (0)
+
+/* Value check macros. */
+#define CHECK_EQ(_i, _val) \
+ do { \
+ if (b[_i] != _val) \
+ __abort_trace ("single", _i, b[_i], _val); \
+ } while (0)
+
+#define CHECK_RANGE_EQ(_start, _end, _val) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = _start; i < _end; ++i) \
+ if (b[i] != _val) \
+ __abort_trace ("range", i, b[i], _val); \
+ } while (0)
+
+#define str(s) #s
+#define TEST_FUNC(_func, _aval, _idx, _force, _bval, _check_stmt) \
+ do { \
+ curr_test = str (_func); \
+ RESET_ARRAYS((_aval), (_idx), (_force), (_bval)); \
+ _func(); \
+ _check_stmt; \
+ } while (0)
+
+int main(void) {
+ /* Break on random intervals. */
+ TEST_FUNC (f1, 1, 0, 1, 10, CHECK_EQ (0, 11); CHECK_EQ (1, 10));
+ TEST_FUNC (f2, -1, 5, 0, 10, CHECK_EQ (0, 9); CHECK_EQ (5, 10));
+ TEST_FUNC (f3, 3, 3, 0, 0, CHECK_EQ (0, 3); CHECK_EQ (3, 0));
+ TEST_FUNC (f4, 0, 4, 1, 1, CHECK_EQ (4, 2); CHECK_EQ (5, 1));
+ TEST_FUNC (f5, 1, 6, -1, 5, CHECK_EQ (6, 4); CHECK_EQ (7, 5));
+ TEST_FUNC (f6, 2, 10, 0, 7, CHECK_EQ (10, 7); CHECK_EQ (11, 7));
+
+ /* Break on last iteration. */
+ TEST_FUNC (f1, 0, N-1, 1, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 2));
+
+ TEST_FUNC (f2, -5, N-1, 0, 9,
+ CHECK_RANGE_EQ (0, N-1, 4); CHECK_EQ (N-1, 9));
+
+ TEST_FUNC (f3, 2, N-1, 0, 0,
+ CHECK_RANGE_EQ(0, N-1, 2); CHECK_EQ (N-1, 0));
+
+ TEST_FUNC (f4, 0, N-1, 2, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f5, 2, N-1, -3, 6,
+ CHECK_RANGE_EQ (0, N-1, 8); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f6, 5, N-1, 0, 7,
+ CHECK_RANGE_EQ (0, N-1, 12); CHECK_EQ (N-1, 7));
+
+ /* Condition never met — full loop executes. */
+ TEST_FUNC (f1, 0, -1, 0, 2,
+ CHECK_RANGE_EQ (0, N, 2));
+
+ TEST_FUNC (f2, -2, -1, 0, 5,
+ CHECK_RANGE_EQ (0, N, 3));
+
+ TEST_FUNC (f3, 1, -1, 0, 0,
+ CHECK_RANGE_EQ (0, N, 1));
+
+ TEST_FUNC (f4, 0, -1, 0, 7,
+ CHECK_RANGE_EQ (0, N, 7));
+
+ TEST_FUNC (f5, 1, -1, 0, 4,
+ CHECK_RANGE_EQ (0, N, 5));
+
+ TEST_FUNC (f6, 5, -1, 0, 3,
+ CHECK_RANGE_EQ (0, N, 8));
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O3 --param aarch64-autovec-preference=sve-only" } */
+/* { dg-require-effective-target lp64 } */
+
+#include <stdio.h>
+
+#define N 640
+#ifndef TYPE
+#define TYPE int
+#endif
+#ifndef FMT
+#define FMT "d"
+#endif
+
+
+TYPE a[N] = {0};
+TYPE b[N] = {0};
+
+char *curr_test;
+
+/* Macro to define a function with a specific comparison */
+#define DEFINE_TEST_FUNC(NAME, OP) \
+ __attribute__((noipa)) \
+ void NAME(void) { \
+ for (int i = 0; i < N; i++) { \
+ b[i] += a[i]; \
+ if (a[i] OP 0) \
+ break; \
+ } \
+ }
+
+/* Generate the six comparisons functions using the macro. */
+DEFINE_TEST_FUNC(f1, >)
+DEFINE_TEST_FUNC(f2, >=)
+DEFINE_TEST_FUNC(f3, ==)
+DEFINE_TEST_FUNC(f4, !=)
+DEFINE_TEST_FUNC(f5, <)
+DEFINE_TEST_FUNC(f6, <=)
+
+__attribute__((noreturn))
+static inline void __abort_trace (const char *m, int i, TYPE result, TYPE expected)
+{
+ printf ("*** [%s] FAIL AT %s:%d in %s - expected %" FMT " but got %" FMT " at pos %d\n",
+ m, __FILE__, __LINE__, curr_test, expected, result, i);
+ __builtin_abort ();
+}
+
+/* Array setup macro. */
+#define RESET_ARRAYS(_aval, _idx, _force, _bval) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = 0; i < N; ++i) { \
+ a[i] = _aval; \
+ b[i] = _bval; \
+ } \
+ if (_idx >= 0 && _idx < N) \
+ a[_idx] = _force; \
+ } while (0)
+
+/* Value check macros. */
+#define CHECK_EQ(_i, _val) \
+ do { \
+ if (b[_i] != _val) \
+ __abort_trace ("single", _i, b[_i], _val); \
+ } while (0)
+
+#define CHECK_RANGE_EQ(_start, _end, _val) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = _start; i < _end; ++i) \
+ if (b[i] != _val) \
+ __abort_trace ("range", i, b[i], _val); \
+ } while (0)
+
+#define str(s) #s
+#define TEST_FUNC(_func, _aval, _idx, _force, _bval, _check_stmt) \
+ do { \
+ curr_test = str (_func); \
+ RESET_ARRAYS((_aval), (_idx), (_force), (_bval)); \
+ _func(); \
+ _check_stmt; \
+ } while (0)
+
+int main(void) {
+ /* Break on random intervals. */
+ TEST_FUNC (f1, 1, 0, 1, 10, CHECK_EQ (0, 11); CHECK_EQ (1, 10));
+ TEST_FUNC (f2, -1, 5, 0, 10, CHECK_EQ (0, 9); CHECK_EQ (5, 10));
+ TEST_FUNC (f3, 3, 3, 0, 0, CHECK_EQ (0, 3); CHECK_EQ (3, 0));
+ TEST_FUNC (f4, 0, 4, 1, 1, CHECK_EQ (4, 2); CHECK_EQ (5, 1));
+ TEST_FUNC (f5, 1, 6, -1, 5, CHECK_EQ (6, 4); CHECK_EQ (7, 5));
+ TEST_FUNC (f6, 2, 10, 0, 7, CHECK_EQ (10, 7); CHECK_EQ (11, 7));
+
+ /* Break on last iteration. */
+ TEST_FUNC (f1, 0, N-1, 1, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 2));
+
+ TEST_FUNC (f2, -5, N-1, 0, 9,
+ CHECK_RANGE_EQ (0, N-1, 4); CHECK_EQ (N-1, 9));
+
+ TEST_FUNC (f3, 2, N-1, 0, 0,
+ CHECK_RANGE_EQ(0, N-1, 2); CHECK_EQ (N-1, 0));
+
+ TEST_FUNC (f4, 0, N-1, 2, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f5, 2, N-1, -3, 6,
+ CHECK_RANGE_EQ (0, N-1, 8); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f6, 5, N-1, 0, 7,
+ CHECK_RANGE_EQ (0, N-1, 12); CHECK_EQ (N-1, 7));
+
+ /* Condition never met — full loop executes. */
+ TEST_FUNC (f1, 0, -1, 0, 2,
+ CHECK_RANGE_EQ (0, N, 2));
+
+ TEST_FUNC (f2, -2, -1, 0, 5,
+ CHECK_RANGE_EQ (0, N, 3));
+
+ TEST_FUNC (f3, 1, -1, 0, 0,
+ CHECK_RANGE_EQ (0, N, 1));
+
+ TEST_FUNC (f4, 0, -1, 0, 7,
+ CHECK_RANGE_EQ (0, N, 7));
+
+ TEST_FUNC (f5, 1, -1, 0, 4,
+ CHECK_RANGE_EQ (0, N, 5));
+
+ TEST_FUNC (f6, 5, -1, 0, 3,
+ CHECK_RANGE_EQ (0, N, 8));
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O3 --param aarch64-autovec-preference=asimd-only" } */
+/* { dg-require-effective-target lp64 } */
+
+#include <stdio.h>
+
+#define N 640
+#ifndef TYPE
+#define TYPE float
+#endif
+#ifndef FMT
+#define FMT ".6f"
+#endif
+
+
+TYPE a[N] = {0};
+TYPE b[N] = {0};
+
+char *curr_test;
+
+/* Macro to define a function with a specific comparison */
+#define DEFINE_TEST_FUNC(NAME, OP) \
+ __attribute__((noipa)) \
+ void NAME(void) { \
+ for (int i = 0; i < N; i++) { \
+ b[i] += a[i]; \
+ if (a[i] OP 0) \
+ break; \
+ } \
+ }
+
+/* Generate the six comparisons functions using the macro. */
+DEFINE_TEST_FUNC(f1, >)
+DEFINE_TEST_FUNC(f2, >=)
+DEFINE_TEST_FUNC(f3, ==)
+DEFINE_TEST_FUNC(f4, !=)
+DEFINE_TEST_FUNC(f5, <)
+DEFINE_TEST_FUNC(f6, <=)
+
+__attribute__((noreturn))
+static inline void __abort_trace (const char *m, int i, TYPE result, TYPE expected)
+{
+ printf ("*** [%s] FAIL AT %s:%d in %s - expected %" FMT " but got %" FMT " at pos %d\n",
+ m, __FILE__, __LINE__, curr_test, expected, result, i);
+ __builtin_abort ();
+}
+
+/* Array setup macro. */
+#define RESET_ARRAYS(_aval, _idx, _force, _bval) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = 0; i < N; ++i) { \
+ a[i] = _aval; \
+ b[i] = _bval; \
+ } \
+ if (_idx >= 0 && _idx < N) \
+ a[_idx] = _force; \
+ } while (0)
+
+/* Value check macros. */
+#define CHECK_EQ(_i, _val) \
+ do { \
+ if (b[_i] != _val) \
+ __abort_trace ("single", _i, b[_i], _val); \
+ } while (0)
+
+#define CHECK_RANGE_EQ(_start, _end, _val) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = _start; i < _end; ++i) \
+ if (b[i] != _val) \
+ __abort_trace ("range", i, b[i], _val); \
+ } while (0)
+
+#define str(s) #s
+#define TEST_FUNC(_func, _aval, _idx, _force, _bval, _check_stmt) \
+ do { \
+ curr_test = str (_func); \
+ RESET_ARRAYS((_aval), (_idx), (_force), (_bval)); \
+ _func(); \
+ _check_stmt; \
+ } while (0)
+
+int main(void) {
+ /* Break on random intervals. */
+ TEST_FUNC (f1, 1, 0, 1, 10, CHECK_EQ (0, 11); CHECK_EQ (1, 10));
+ TEST_FUNC (f2, -1, 5, 0, 10, CHECK_EQ (0, 9); CHECK_EQ (5, 10));
+ TEST_FUNC (f3, 3, 3, 0, 0, CHECK_EQ (0, 3); CHECK_EQ (3, 0));
+ TEST_FUNC (f4, 0, 4, 1, 1, CHECK_EQ (4, 2); CHECK_EQ (5, 1));
+ TEST_FUNC (f5, 1, 6, -1, 5, CHECK_EQ (6, 4); CHECK_EQ (7, 5));
+ TEST_FUNC (f6, 2, 10, 0, 7, CHECK_EQ (10, 7); CHECK_EQ (11, 7));
+
+ /* Break on last iteration. */
+ TEST_FUNC (f1, 0, N-1, 1, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 2));
+
+ TEST_FUNC (f2, -5, N-1, 0, 9,
+ CHECK_RANGE_EQ (0, N-1, 4); CHECK_EQ (N-1, 9));
+
+ TEST_FUNC (f3, 2, N-1, 0, 0,
+ CHECK_RANGE_EQ(0, N-1, 2); CHECK_EQ (N-1, 0));
+
+ TEST_FUNC (f4, 0, N-1, 2, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f5, 2, N-1, -3, 6,
+ CHECK_RANGE_EQ (0, N-1, 8); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f6, 5, N-1, 0, 7,
+ CHECK_RANGE_EQ (0, N-1, 12); CHECK_EQ (N-1, 7));
+
+ /* Condition never met — full loop executes. */
+ TEST_FUNC (f1, 0, -1, 0, 2,
+ CHECK_RANGE_EQ (0, N, 2));
+
+ TEST_FUNC (f2, -2, -1, 0, 5,
+ CHECK_RANGE_EQ (0, N, 3));
+
+ TEST_FUNC (f3, 1, -1, 0, 0,
+ CHECK_RANGE_EQ (0, N, 1));
+
+ TEST_FUNC (f4, 0, -1, 0, 7,
+ CHECK_RANGE_EQ (0, N, 7));
+
+ TEST_FUNC (f5, 1, -1, 0, 4,
+ CHECK_RANGE_EQ (0, N, 5));
+
+ TEST_FUNC (f6, 5, -1, 0, 3,
+ CHECK_RANGE_EQ (0, N, 8));
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O3 --param aarch64-autovec-preference=sve-only" } */
+/* { dg-require-effective-target lp64 } */
+
+#include <stdio.h>
+
+#define N 640
+#ifndef TYPE
+#define TYPE float
+#endif
+#ifndef FMT
+#define FMT ".6f"
+#endif
+
+
+TYPE a[N] = {0};
+TYPE b[N] = {0};
+
+char *curr_test;
+
+/* Macro to define a function with a specific comparison */
+#define DEFINE_TEST_FUNC(NAME, OP) \
+ __attribute__((noipa)) \
+ void NAME(void) { \
+ for (int i = 0; i < N; i++) { \
+ b[i] += a[i]; \
+ if (a[i] OP 0) \
+ break; \
+ } \
+ }
+
+/* Generate the six comparisons functions using the macro. */
+DEFINE_TEST_FUNC(f1, >)
+DEFINE_TEST_FUNC(f2, >=)
+DEFINE_TEST_FUNC(f3, ==)
+DEFINE_TEST_FUNC(f4, !=)
+DEFINE_TEST_FUNC(f5, <)
+DEFINE_TEST_FUNC(f6, <=)
+
+__attribute__((noreturn))
+static inline void __abort_trace (const char *m, int i, TYPE result, TYPE expected)
+{
+ printf ("*** [%s] FAIL AT %s:%d in %s - expected %" FMT " but got %" FMT " at pos %d\n",
+ m, __FILE__, __LINE__, curr_test, expected, result, i);
+ __builtin_abort ();
+}
+
+/* Array setup macro. */
+#define RESET_ARRAYS(_aval, _idx, _force, _bval) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = 0; i < N; ++i) { \
+ a[i] = _aval; \
+ b[i] = _bval; \
+ } \
+ if (_idx >= 0 && _idx < N) \
+ a[_idx] = _force; \
+ } while (0)
+
+/* Value check macros. */
+#define CHECK_EQ(_i, _val) \
+ do { \
+ if (b[_i] != _val) \
+ __abort_trace ("single", _i, b[_i], _val); \
+ } while (0)
+
+#define CHECK_RANGE_EQ(_start, _end, _val) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = _start; i < _end; ++i) \
+ if (b[i] != _val) \
+ __abort_trace ("range", i, b[i], _val); \
+ } while (0)
+
+#define str(s) #s
+#define TEST_FUNC(_func, _aval, _idx, _force, _bval, _check_stmt) \
+ do { \
+ curr_test = str (_func); \
+ RESET_ARRAYS((_aval), (_idx), (_force), (_bval)); \
+ _func(); \
+ _check_stmt; \
+ } while (0)
+
+int main(void) {
+ /* Break on random intervals. */
+ TEST_FUNC (f1, 1, 0, 1, 10, CHECK_EQ (0, 11); CHECK_EQ (1, 10));
+ TEST_FUNC (f2, -1, 5, 0, 10, CHECK_EQ (0, 9); CHECK_EQ (5, 10));
+ TEST_FUNC (f3, 3, 3, 0, 0, CHECK_EQ (0, 3); CHECK_EQ (3, 0));
+ TEST_FUNC (f4, 0, 4, 1, 1, CHECK_EQ (4, 2); CHECK_EQ (5, 1));
+ TEST_FUNC (f5, 1, 6, -1, 5, CHECK_EQ (6, 4); CHECK_EQ (7, 5));
+ TEST_FUNC (f6, 2, 10, 0, 7, CHECK_EQ (10, 7); CHECK_EQ (11, 7));
+
+ /* Break on last iteration. */
+ TEST_FUNC (f1, 0, N-1, 1, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 2));
+
+ TEST_FUNC (f2, -5, N-1, 0, 9,
+ CHECK_RANGE_EQ (0, N-1, 4); CHECK_EQ (N-1, 9));
+
+ TEST_FUNC (f3, 2, N-1, 0, 0,
+ CHECK_RANGE_EQ(0, N-1, 2); CHECK_EQ (N-1, 0));
+
+ TEST_FUNC (f4, 0, N-1, 2, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f5, 2, N-1, -3, 6,
+ CHECK_RANGE_EQ (0, N-1, 8); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f6, 5, N-1, 0, 7,
+ CHECK_RANGE_EQ (0, N-1, 12); CHECK_EQ (N-1, 7));
+
+ /* Condition never met — full loop executes. */
+ TEST_FUNC (f1, 0, -1, 0, 2,
+ CHECK_RANGE_EQ (0, N, 2));
+
+ TEST_FUNC (f2, -2, -1, 0, 5,
+ CHECK_RANGE_EQ (0, N, 3));
+
+ TEST_FUNC (f3, 1, -1, 0, 0,
+ CHECK_RANGE_EQ (0, N, 1));
+
+ TEST_FUNC (f4, 0, -1, 0, 7,
+ CHECK_RANGE_EQ (0, N, 7));
+
+ TEST_FUNC (f5, 1, -1, 0, 4,
+ CHECK_RANGE_EQ (0, N, 5));
+
+ TEST_FUNC (f6, 5, -1, 0, 3,
+ CHECK_RANGE_EQ (0, N, 8));
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=asimd-only" } */
+/* { dg-require-effective-target lp64 } */
+
+#include <stdio.h>
+#include <math.h>
+
+#define N 640
+#ifndef TYPE
+#define TYPE float
+#endif
+#ifndef FMT
+#define FMT ".6f"
+#endif
+
+TYPE a[N] = {0};
+TYPE b[N] = {0};
+
+char *curr_test;
+
+/* Macro to define a function with a specific comparison */
+#define DEFINE_TEST_FUNC(NAME, OP) \
+ __attribute__((noipa)) \
+ void NAME(void) { \
+ for (int i = 0; i < N; i++) { \
+ b[i] += a[i]; \
+ if (a[i] OP 0) \
+ break; \
+ } \
+ }
+
+/* Generate comparison functions */
+DEFINE_TEST_FUNC(f1, >)
+DEFINE_TEST_FUNC(f2, >=)
+DEFINE_TEST_FUNC(f3, ==)
+DEFINE_TEST_FUNC(f4, !=)
+DEFINE_TEST_FUNC(f5, <)
+DEFINE_TEST_FUNC(f6, <=)
+
+/* Example unordered-sensitive loop: breaks if a[i] is unordered with 0 */
+__attribute__((noipa))
+void f7(void) {
+ for (int i = 0; i < N; i++) {
+ b[i] += a[i];
+ if (__builtin_isunordered(a[i], 0.0f))
+ break;
+ }
+}
+
+__attribute__((noreturn))
+static inline void __abort_trace(const char *m, int i, TYPE result, TYPE expected) {
+ printf("*** [%s] FAIL AT %s:%d in %s - expected %" FMT " but got %" FMT " at pos %d\n",
+ m, __FILE__, __LINE__, curr_test, expected, result, i);
+ __builtin_abort();
+}
+
+/* Array setup */
+#define RESET_ARRAYS(_aval, _idx, _force, _bval) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = 0; i < N; ++i) { \
+ a[i] = _aval; \
+ b[i] = _bval; \
+ } \
+ if (_idx >= 0 && _idx < N) \
+ a[_idx] = _force; \
+ } while (0)
+
+/* Floating-point comparison macros (with unordered handling) */
+#define CHECK_EQ(_i, _val) do { \
+ if (__builtin_isnan (_val) != __builtin_isnan (b[_i]) \
+ && b[_i] != _val) \
+ __abort_trace ("single", _i, b[_i], _val); \
+} while (0)
+
+#define CHECK_RANGE_EQ(_start, _end, _val) do { \
+ _Pragma("GCC novector") \
+ for (int i = _start; i < _end; ++i) \
+ if (__builtin_isnan (_val) != __builtin_isnan (b[i]) \
+ && b[i] != _val) \
+ __abort_trace ("range", i, b[i], _val); \
+} while (0)
+
+#define str(s) #s
+#define TEST_FUNC(_func, _aval, _idx, _force, _bval, _check_stmt) \
+ do { \
+ curr_test = str (_func); \
+ RESET_ARRAYS((_aval), (_idx), (_force), (_bval)); \
+ _func(); \
+ _check_stmt; \
+ } while (0)
+
+int main(void) {
+ /* Break on random intervals. */
+ TEST_FUNC(f1, 1.0f, 0, 1.0f, 10.0f, CHECK_EQ(0, 11.0f); CHECK_EQ(1, 10.0f));
+ TEST_FUNC(f2, -1.0f, 5, 0.0f, 10.0f, CHECK_EQ(0, 9.0f); CHECK_EQ(5, 10.0f));
+ TEST_FUNC(f3, 3.0f, 3, 0.0f, 0.0f, CHECK_EQ(0, 3.0f); CHECK_EQ(3, 0.0f));
+ TEST_FUNC(f4, 0.0f, 4, 1.0f, 1.0f, CHECK_EQ(4, 2.0f); CHECK_EQ(5, 1.0f));
+ TEST_FUNC(f5, 1.0f, 6, -1.0f, 5.0f, CHECK_EQ(6, 4.0f); CHECK_EQ(7, 5.0f));
+ TEST_FUNC(f6, 2.0f, 10, 0.0f, 7.0f, CHECK_EQ(10, 7.0f); CHECK_EQ(11, 7.0f));
+
+ /* Break on last iteration. */
+ TEST_FUNC(f1, 0.0f, N - 1, 1.0f, 1.0f,
+ CHECK_RANGE_EQ(0, N - 1, 1.0f); CHECK_EQ(N - 1, 2.0f));
+
+ TEST_FUNC(f2, -5.0f, N - 1, 0.0f, 9.0f,
+ CHECK_RANGE_EQ(0, N - 1, 4.0f); CHECK_EQ(N - 1, 9.0f));
+
+ TEST_FUNC(f3, 2.0f, N - 1, 0.0f, 0.0f,
+ CHECK_RANGE_EQ(0, N - 1, 2.0f); CHECK_EQ(N - 1, 0.0f));
+
+ TEST_FUNC(f4, 0.0f, N - 1, 2.0f, 1.0f,
+ CHECK_RANGE_EQ(0, N - 1, 1.0f); CHECK_EQ(N - 1, 3.0f));
+
+ TEST_FUNC(f5, 2.0f, N - 1, -3.0f, 6.0f,
+ CHECK_RANGE_EQ(0, N - 1, 8.0f); CHECK_EQ(N - 1, 3.0f));
+
+ TEST_FUNC(f6, 5.0f, N - 1, 0.0f, 7.0f,
+ CHECK_RANGE_EQ(0, N - 1, 12.0f); CHECK_EQ(N - 1, 7.0f));
+
+ /* Condition never met — full loop executes. */
+ TEST_FUNC(f1, 0.0f, -1, 0.0f, 2.0f,
+ CHECK_RANGE_EQ(0, N, 2.0f));
+
+ TEST_FUNC(f2, -2.0f, -1, 0.0f, 5.0f,
+ CHECK_RANGE_EQ(0, N, 3.0f));
+
+ TEST_FUNC(f3, 1.0f, -1, 0.0f, 0.0f,
+ CHECK_RANGE_EQ(0, N, 1.0f));
+
+ TEST_FUNC(f4, 0.0f, -1, 0.0f, 7.0f,
+ CHECK_RANGE_EQ(0, N, 7.0f));
+
+ TEST_FUNC(f5, 1.0f, -1, 0.0f, 4.0f,
+ CHECK_RANGE_EQ(0, N, 5.0f));
+
+ TEST_FUNC(f6, 5.0f, -1, 0.0f, 3.0f,
+ CHECK_RANGE_EQ(0, N, 8.0f));
+
+#if !defined(__FAST_MATH__)
+ /* Unordered break (NAN in a[i]) */
+ TEST_FUNC(f7, 1.0f, 123, NAN, 2.0f,
+ CHECK_RANGE_EQ(0, 123, 3.0f); CHECK_EQ(123, NAN));
+#endif
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=asimd-only" } */
+/* { dg-require-effective-target lp64 } */
+
+#include <stdio.h>
+
+#define N 640
+#ifndef TYPE
+#define TYPE float
+#endif
+#ifndef FMT
+#define FMT ".6f"
+#endif
+
+
+TYPE a[N] = {0};
+TYPE b[N] = {0};
+
+char *curr_test;
+
+/* Macro to define a function with a specific comparison */
+#define DEFINE_TEST_FUNC(NAME, OP) \
+ __attribute__((noipa)) \
+ void NAME(void) { \
+ for (int i = 0; i < N; i++) { \
+ b[i] += a[i]; \
+ if (a[i] OP 0) \
+ break; \
+ } \
+ }
+
+/* Generate the six comparisons functions using the macro. */
+DEFINE_TEST_FUNC(f1, >)
+DEFINE_TEST_FUNC(f2, >=)
+DEFINE_TEST_FUNC(f3, ==)
+DEFINE_TEST_FUNC(f4, !=)
+DEFINE_TEST_FUNC(f5, <)
+DEFINE_TEST_FUNC(f6, <=)
+
+__attribute__((noreturn))
+static inline void __abort_trace (const char *m, int i, TYPE result, TYPE expected)
+{
+ printf ("*** [%s] FAIL AT %s:%d in %s - expected %" FMT " but got %" FMT " at pos %d\n",
+ m, __FILE__, __LINE__, curr_test, expected, result, i);
+ __builtin_abort ();
+}
+
+/* Array setup macro. */
+#define RESET_ARRAYS(_aval, _idx, _force, _bval) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = 0; i < N; ++i) { \
+ a[i] = _aval; \
+ b[i] = _bval; \
+ } \
+ if (_idx >= 0 && _idx < N) \
+ a[_idx] = _force; \
+ } while (0)
+
+/* Value check macros. */
+#define CHECK_EQ(_i, _val) \
+ do { \
+ if (b[_i] != _val) \
+ __abort_trace ("single", _i, b[_i], _val); \
+ } while (0)
+
+#define CHECK_RANGE_EQ(_start, _end, _val) \
+ do { \
+ _Pragma("GCC novector") \
+ for (int i = _start; i < _end; ++i) \
+ if (b[i] != _val) \
+ __abort_trace ("range", i, b[i], _val); \
+ } while (0)
+
+#define str(s) #s
+#define TEST_FUNC(_func, _aval, _idx, _force, _bval, _check_stmt) \
+ do { \
+ curr_test = str (_func); \
+ RESET_ARRAYS((_aval), (_idx), (_force), (_bval)); \
+ _func(); \
+ _check_stmt; \
+ } while (0)
+
+int main(void) {
+ /* Break on random intervals. */
+ TEST_FUNC (f1, 1, 0, 1, 10, CHECK_EQ (0, 11); CHECK_EQ (1, 10));
+ TEST_FUNC (f2, -1, 5, 0, 10, CHECK_EQ (0, 9); CHECK_EQ (5, 10));
+ TEST_FUNC (f3, 3, 3, 0, 0, CHECK_EQ (0, 3); CHECK_EQ (3, 0));
+ TEST_FUNC (f4, 0, 4, 1, 1, CHECK_EQ (4, 2); CHECK_EQ (5, 1));
+ TEST_FUNC (f5, 1, 6, -1, 5, CHECK_EQ (6, 4); CHECK_EQ (7, 5));
+ TEST_FUNC (f6, 2, 10, 0, 7, CHECK_EQ (10, 7); CHECK_EQ (11, 7));
+
+ /* Break on last iteration. */
+ TEST_FUNC (f1, 0, N-1, 1, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 2));
+
+ TEST_FUNC (f2, -5, N-1, 0, 9,
+ CHECK_RANGE_EQ (0, N-1, 4); CHECK_EQ (N-1, 9));
+
+ TEST_FUNC (f3, 2, N-1, 0, 0,
+ CHECK_RANGE_EQ(0, N-1, 2); CHECK_EQ (N-1, 0));
+
+ TEST_FUNC (f4, 0, N-1, 2, 1,
+ CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f5, 2, N-1, -3, 6,
+ CHECK_RANGE_EQ (0, N-1, 8); CHECK_EQ (N-1, 3));
+
+ TEST_FUNC (f6, 5, N-1, 0, 7,
+ CHECK_RANGE_EQ (0, N-1, 12); CHECK_EQ (N-1, 7));
+
+ /* Condition never met — full loop executes. */
+ TEST_FUNC (f1, 0, -1, 0, 2,
+ CHECK_RANGE_EQ (0, N, 2));
+
+ TEST_FUNC (f2, -2, -1, 0, 5,
+ CHECK_RANGE_EQ (0, N, 3));
+
+ TEST_FUNC (f3, 1, -1, 0, 0,
+ CHECK_RANGE_EQ (0, N, 1));
+
+ TEST_FUNC (f4, 0, -1, 0, 7,
+ CHECK_RANGE_EQ (0, N, 7));
+
+ TEST_FUNC (f5, 1, -1, 0, 4,
+ CHECK_RANGE_EQ (0, N, 5));
+
+ TEST_FUNC (f6, 5, -1, 0, 3,
+ CHECK_RANGE_EQ (0, N, 8));
+
+ return 0;
+}
/*
** f3:
** ...
-** cmeq v[0-9]+.4s, v[0-9]+.4s, #0
** umaxp v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
** fmov x[0-9]+, d[0-9]+
** cbn?z x[0-9]+, \.L[0-9]+
/*
** f4:
** ...
-** cmtst v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
** umaxp v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
** fmov x[0-9]+, d[0-9]+
** cbn?z x[0-9]+, \.L[0-9]+
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 --param aarch64-autovec-preference=asimd-only" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+sve"
+
+#define N 640
+int a[N] = {0};
+int b[N] = {0};
+/*
+** f1:
+** ...
+** cmpgt p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f1 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] > 0)
+ break;
+ }
+}
+/*
+** f2:
+** ...
+** cmpge p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f2 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] >= 0)
+ break;
+ }
+}
+/*
+** f3:
+** ...
+** cmpeq p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f3 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] == 0)
+ break;
+ }
+}
+/*
+** f4:
+** ...
+** cmpne p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f4 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] != 0)
+ break;
+ }
+}
+/*
+** f5:
+** ...
+** cmplt p[0-9]+.s, p7/z, z[0-9]+.s, #0
+** b(\.?eq|\.none) .L[0-9]+
+** ...
+*/
+void f5 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] < 0)
+ break;
+ }
+}
+/*
+** f6:
+** ...
+** cmple p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f6 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] <= 0)
+ break;
+ }
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 --param aarch64-autovec-preference=asimd-only" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+sve"
+
+#define N 640
+float a[N] = {0};
+float b[N] = {0};
+
+/*
+** f1:
+** ...
+** fcmgt p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0.0
+** ptest p[0-9]+, p[0-9]+\.b
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f1 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] > 0)
+ break;
+ }
+}
+/*
+** f2:
+** ...
+** fcmge p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0.0
+** ptest p[0-9]+, p[0-9]+\.b
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f2 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] >= 0)
+ break;
+ }
+}
+/*
+** f3:
+** ...
+** fcmeq p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0.0
+** ptest p[0-9]+, p[0-9]+\.b
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f3 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] == 0)
+ break;
+ }
+}
+/*
+** f4:
+** ...
+** fcmne p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0.0
+** ptest p[0-9]+, p[0-9]+\.b
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f4 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] != 0)
+ break;
+ }
+}
+/*
+** f5:
+** ...
+** fcmlt p[0-9]+.s, p7/z, z[0-9]+.s, #0.0
+** ptest p[0-9]+, p[0-9]+\.b
+** b(\.?eq|\.none) .L[0-9]+
+** ...
+*/
+void f5 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] < 0)
+ break;
+ }
+}
+/*
+** f6:
+** ...
+** fcmle p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0.0
+** ptest p[0-9]+, p[0-9]+\.b
+** b(\.?eq|\.none) \.L[0-9]+
+** ...
+*/
+void f6 ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ b[i] += a[i];
+ if (a[i] <= 0)
+ break;
+ }
+}