;; Predicated floating-point unary arithmetic, merging with the first input.
(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ (unspec:SVE_F
[(match_operand 3)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")]
+ (match_operand:SVE_F 2 "register_operand")]
SVE_COND_FP_UNARY)
(match_dup 2)]
UNSPEC_SEL))]
;; as earlyclobber helps to make the instruction more regular to the
;; register allocator.
(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ (unspec:SVE_F
[(match_operand 4)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")]
+ (match_operand:SVE_F 2 "register_operand")]
SVE_COND_FP_UNARY)
- (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define a_i a[i]
+#define b_i b[i]
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \
+ void \
+ f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p, \
+ TYPE0 *__restrict out, \
+ TYPE0 *__restrict a, \
+ TYPE0 *__restrict b) \
+ { \
+ for (unsigned int i = 0; i < COUNT; i++) \
+ out[i] = p[i] ? FN (a[i]) : MERGE; \
+ }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)
+
+TEST_ALL (__builtin_fabsf16, _Float16, uint64_t, 32)
+
+TEST_ALL (__builtin_fabsf16, _Float16, uint32_t, 64)
+
+TEST_ALL (__builtin_fabsf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define a_i a[i]
+#define b_i b[i]
+
+#define NEG(X) -X
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \
+ void \
+ f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p, \
+ TYPE0 *__restrict out, \
+ TYPE0 *__restrict a, \
+ TYPE0 *__restrict b) \
+ { \
+ for (unsigned int i = 0; i < COUNT; i++) \
+ out[i] = p[i] ? FN (a[i]) : MERGE; \
+ }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)
+
+TEST_ALL (NEG, _Float16, uint64_t, 32)
+
+TEST_ALL (NEG, _Float16, uint32_t, 64)
+
+TEST_ALL (NEG, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */
+
+#include <stdint.h>
+
+#define a_i a[i]
+#define b_i b[i]
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \
+ void \
+ f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p, \
+ TYPE0 *__restrict out, \
+ TYPE0 *__restrict a, \
+ TYPE0 *__restrict b) \
+ { \
+ for (unsigned int i = 0; i < COUNT; i++) \
+ out[i] = p[i] ? FN (a[i]) : MERGE; \
+ }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)
+
+TEST_ALL (__builtin_roundf16, _Float16, uint64_t, 32)
+
+TEST_ALL (__builtin_roundf16, _Float16, uint32_t, 64)
+
+TEST_ALL (__builtin_roundf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -mtune=generic -ftree-vectorize" } */
+
+#include "unpacked_cond_frinta_1.c"
+
+/* Test that we don't drop SELs without -fno-trapping-math. */
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tsel\t} 6 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */
+
+#include <stdint.h>
+
+#define a_i a[i]
+#define b_i b[i]
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \
+ void \
+ f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p, \
+ TYPE0 *__restrict out, \
+ TYPE0 *__restrict a, \
+ TYPE0 *__restrict b) \
+ { \
+ for (unsigned int i = 0; i < COUNT; i++) \
+ out[i] = p[i] ? FN (a[i]) : MERGE; \
+ }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)
+
+TEST_ALL (__builtin_nearbyintf16, _Float16, uint64_t, 32)
+
+TEST_ALL (__builtin_nearbyintf16, _Float16, uint32_t, 64)
+
+TEST_ALL (__builtin_nearbyintf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */
+
+#include <stdint.h>
+
+#define a_i a[i]
+#define b_i b[i]
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \
+ void \
+ f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p, \
+ TYPE0 *__restrict out, \
+ TYPE0 *__restrict a, \
+ TYPE0 *__restrict b) \
+ { \
+ for (unsigned int i = 0; i < COUNT; i++) \
+ out[i] = p[i] ? FN (a[i]) : MERGE; \
+ }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)
+
+TEST_ALL (__builtin_floorf16, _Float16, uint64_t, 32)
+
+TEST_ALL (__builtin_floorf16, _Float16, uint32_t, 64)
+
+TEST_ALL (__builtin_floorf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */
+
+#include <stdint.h>
+
+#define a_i a[i]
+#define b_i b[i]
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \
+ void \
+ f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p, \
+ TYPE0 *__restrict out, \
+ TYPE0 *__restrict a, \
+ TYPE0 *__restrict b) \
+ { \
+ for (unsigned int i = 0; i < COUNT; i++) \
+ out[i] = p[i] ? FN (a[i]) : MERGE; \
+ }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)
+
+TEST_ALL (__builtin_ceilf16, _Float16, uint64_t, 32)
+
+TEST_ALL (__builtin_ceilf16, _Float16, uint32_t, 64)
+
+TEST_ALL (__builtin_ceilf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */
+
+#include <stdint.h>
+
+#define a_i a[i]
+#define b_i b[i]
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \
+ void \
+ f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p, \
+ TYPE0 *__restrict out, \
+ TYPE0 *__restrict a, \
+ TYPE0 *__restrict b) \
+ { \
+ for (unsigned int i = 0; i < COUNT; i++) \
+ out[i] = p[i] ? FN (a[i]) : MERGE; \
+ }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)
+
+TEST_ALL (__builtin_rintf16, _Float16, uint64_t, 32)
+
+TEST_ALL (__builtin_rintf16, _Float16, uint32_t, 64)
+
+TEST_ALL (__builtin_rintf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize -fno-trapping-math" } */
+
+#include <stdint.h>
+
+#define a_i a[i]
+#define b_i b[i]
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \
+ void \
+ f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p, \
+ TYPE0 *__restrict out, \
+ TYPE0 *__restrict a, \
+ TYPE0 *__restrict b) \
+ { \
+ for (unsigned int i = 0; i < COUNT; i++) \
+ out[i] = p[i] ? FN (a[i]) : MERGE; \
+ }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)
+
+TEST_ALL (__builtin_truncf16, _Float16, uint64_t, 32)
+
+TEST_ALL (__builtin_truncf16, _Float16, uint32_t, 64)
+
+TEST_ALL (__builtin_truncf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */